*********************************************
* Title: rwanda_jde_table2.do
* Author: Todd Pugatch
* Last update: June 10 2024
* Description: analysis for Blimpo and Pugatch, "Entrepreneurship Education
*	and Teacher Training in Rwanda," Stage 2 Registered report, Journal of 
*	Development Economics
* Inputs: 	headteacher_merge_jde.dta
*			teacher_merge_jde.dta
*			student_merge_jde.dta
* Outputs: 	rwanda_jde_table2.[txt/xls]
* Notes: produces Table 2
**********************************************

local start=`"$S_TIME"'
clear
clear matrix
clear mata
graph drop _all
program drop _all
cap log close
set more off

* Set directories 
*global main "[SET MAIN DIRECTORY HERE]"
	global rawdata "$main/01_data/01_raw"
	global cleandata "$main/01_data/02_clean"
	global dofiles "$main/02_dofiles"
	global results "$main/03_results"
	global temp "$main/04_output"

* begin log file
log using "$temp/rwanda_jde_table2.txt", text replace

/*BASELINE BALANCE TESTS: 
	--first get means by treatment status and unadjusted difference
	--then adjust p-values for stratification
	--then run F-test of treatment regressed on all observables, first within 
		HT/teacher/student datasets, then overall*/
	
* HEAD TEACHER DATA
qui use "$cleandata/headteacher_merge_jde.dta", clear

/*head teacher data: variables for balance tests*/
local X "boarding enroll_s4_m_bl enroll_s4_f_bl teach_us_bl teach_absent_pct_bl electricity_now_bl ht_skillslab_definition_bl ht_pedagogy_active_bl"
	
* update variable labels so they work with "orth_out"
lab var boarding "BHQ109: boarding school"
lab var teach_absent_pct_bl "BHQ213/BHQ210: teacher absence %"
lab var electricity_now_bl "BHQ216: electricity at school"
lab var ht_skillslab_definition_bl "BHQ611: knows Skills Lab def."
lab var ht_pedagogy_active_bl "BHQ614: interactive pedagogy"

/*head teacher data: conduct balance test:
	--first get unadjusted diffs, then append adjusted diffs to same table*/
orth_out `X' using "$results/rwanda_jde_table2.xls", by(treatment) se compare count colnum title("head teacher baseline, unadjusted") replace
xi: orth_out `X' using "$results/rwanda_jde_table2.xls", by(treatment) se compare count colnum test covar(i.strata) title("head teacher baseline, adjusted") happend replace
	
/*F-test on all head teacher variables*/
/*first handle missing data:
	1. impute values of missing observations (use control group mean)
	2. create dummy for missing (to include in regression)*/
foreach x in `X' {
	qui gen `x'i=`x'
	qui su `x' if treatment==0
	qui replace `x'i=r(mean) if `x'==.
	qui gen `x'm=(`x'==.)
	lab var `x'i "`x', missing imputed to control group mean"
	lab var `x'm "missing value for `x'"
}
local Xi "boardingi enroll_s4_m_bli enroll_s4_f_bli teach_us_bli teach_absent_pct_bli electricity_now_bli ht_skillslab_definition_bli ht_pedagogy_active_bli"
local Xm "boardingm enroll_s4_m_blm enroll_s4_f_blm teach_us_blm teach_absent_pct_blm electricity_now_blm ht_skillslab_definition_blm ht_pedagogy_active_blm"
	
/*Then: 	1. regress treatment on all covariates and dummies for missing
			2. jointly test significance of all covariates and missing dummies*/
qui xi: reg treatment `Xi' `Xm' i.strata
testparm `Xi' `Xm'

/*save for later omnibus regression*/
keep school_code treatment strata `Xi' `Xm'
qui save "$temp/httemp.dta", replace

* TEACHER DATA

qui use "$cleandata/teacher_merge_jde.dta", clear

/*keep only teachers in baseline*/
qui keep if insample_bl==1

/*teacher data: variables for balance tests*/
local X "female_bl age_bl qualified_bl entre_lessonplan_shown_bl pedagogy_active_bl profit_calculation_bl profit_definition_bl otherjob_bl"

* update variable labels so they work with "orth_out"
lab var entre_lessonplan_shown_bl "BTQ224: showed lesson plan"
lab var pedagogy_active_bl "BTQ300: interactive pedagogy"
lab var profit_calculation_bl "BTQ402: can calculate profit"
lab var profit_definition_bl "BTQ405: knows profit def."
lab var otherjob_bl "BTQ600: moonlighting"

/*teacher data: conduct balance test:
	--first get unadjusted diffs, then append adjusted diffs to same table*/

orth_out `X' using "$results/rwanda_jde_table2.xls", by(treatment) se compare count colnum title("teacher baseline, unadjusted") happend replace 
xi: orth_out `X' using "$results/rwanda_jde_table2.xls", by(treatment) se compare count colnum test covar(i.strata) title("teacher baseline, adjusted") happend replace
	
/*F-test on all teacher variables*/
/*first handle missing data:
	1. impute values of missing observations (use control group mean)
	2. create dummy for missing (to include in regression)*/
foreach x in `X' {
	qui gen `x'i=`x'
	qui su `x' if treatment==0
	qui replace `x'i=r(mean) if `x'==.
	qui gen `x'm=(`x'==.)
	lab var `x'i "`x', missing imputed to control group mean"
	lab var `x'm "missing value for `x'"
}
local Xi "female_bli age_bli qualified_bli entre_lessonplan_shown_bli pedagogy_active_bli profit_calculation_bli profit_definition_bli otherjob_bli"
local Xm "female_blm age_blm qualified_blm entre_lessonplan_shown_blm pedagogy_active_blm profit_calculation_blm profit_definition_blm otherjob_blm"
	
/*Then: 	1. regress treatment on all covariates and dummies for missing
			2. jointly test significance of all covariates and missing dummies*/
qui xi: reg treatment `Xi' `Xm' i.strata
testparm `Xi' `Xm'

/*save for later omnibus regression*/
keep school_code treatment strata `Xi' `Xm'
qui save "$temp/teachertemp.dta", replace

* STUDENT DATA
qui use "$cleandata/student_merge_jde.dta", clear

/*student data: variables for balance tests*/
local X "female assets_pct_bl moth_primary_ormore_bl repeat_S4_bl S3_exam_bl job_holiday_bl compound_interest_bl anysavings_bl profit_calculation_bl	planned_schl_postsec_bl planned_business_bl grit_raw_bl"

* update variable labels so they work with "orth_out"
lab var assets_pct_bl "BSQ306: household asses"
lab var moth_primary_ormore_bl "BSQ310: mother primary or more"	
lab var S3_exam_bl "BSQ404: S3 exam score"
lab var job_holiday_bl "BSQ700: worked last holiday"	
lab var compound_interest_bl "BSQ803: knows compound interest"
lab var profit_calculation_bl "BSQ1002: can calculate profit"
lab var planned_schl_postsec_bl "BSQ100: plans post-secondary"
lab var planned_business_bl "BSQ1102: plans to start business"
lab var grit_raw_bl "BSQ1300-1303: grit index (1=lowest, 5=highest)"

/*student data: conduct balance test:
	--first get unadjusted diffs, then append adjusted diffs to same table
	--cluster s.e.'s by school*/
orth_out `X' using "$results/rwanda_jde_table2.xls", by(treatment) se vce(cluster school_code) compare count colnum title("student baseline, unadjusted") happend replace
	
xi: orth_out `X' using "$results/rwanda_jde_table2.xls", by(treatment) se vce(cluster school_code) compare count colnum test covar(i.strata) title("student baseline, adjusted") happend replace

/*F-test on all student variables*/
/*first handle missing data:
	1. impute values of missing observations (use control group mean)
	2. create dummy for missing (to include in regression)*/
foreach x in `X' {
	qui gen `x'i=`x'
	qui su `x' if treatment==0
	qui replace `x'i=r(mean) if `x'==.
	qui gen `x'm=(`x'==.)
	lab var `x'i "`x', missing imputed to control group mean"
	lab var `x'm "missing value for `x'"
}
local Xi "femalei assets_pct_bli moth_primary_ormore_bli repeat_S4_bli S3_exam_bli job_holiday_bli compound_interest_bli anysavings_bli profit_calculation_bli planned_schl_postsec_bli planned_business_bli grit_raw_bli"

local Xm "femalem assets_pct_blm moth_primary_ormore_blm repeat_S4_blm S3_exam_blm job_holiday_blm compound_interest_blm anysavings_blm profit_calculation_blm planned_schl_postsec_blm planned_business_blm grit_raw_blm"
	
/*Then: 	1. regress treatment on all covariates and dummies for missing
			2. jointly test significance of all covariates and missing dummies*/	
qui xi: reg treatment `Xi' `Xm' i.strata, cluster(school_code)
testparm `Xi' `Xm'

/*repeat omnibus test, omitting imbalanced student X's: female, employment, grit*/
local Xir "assets_pct_bli moth_primary_ormore_bli repeat_S4_bli S3_exam_bli compound_interest_bli anysavings_bli profit_calculation_bli planned_schl_postsec_bli planned_business_bli"

local Xmr "assets_pct_blm moth_primary_ormore_blm repeat_S4_blm S3_exam_blm compound_interest_blm anysavings_blm profit_calculation_blm planned_schl_postsec_blm planned_business_blm"

qui xi: reg treatment `Xir' `Xmr' i.strata, cluster(school_code)
testparm `Xir' `Xmr'	
	
/*collapse by school for later omnibus regression*/
qui collapse (mean) `Xi' `Xm', by(school_code)
qui save "$temp/studenttemp.dta", replace

* COMBINE HT/TEACHER/STUDENT BASELINE DATASETS FOR OVERALL F-TEST OF BALANCE
qui use "$temp/httemp.dta", clear
qui merge 1:1 school_code using "$temp/teachertemp.dta", update replace
drop _merge
ren female_bli female_teacher_bli
ren female_blm female_teacher_blm
qui merge 1:1 school_code using "$temp/studenttemp.dta", update replace
drop _merge

* omnibus test
local Xiht "boardingi enroll_s4_m_bli enroll_s4_f_bli teach_us_bli teach_absent_pct_bli electricity_now_bli ht_skillslab_definition_bli ht_pedagogy_active_bli"
local Xmht "boardingm enroll_s4_m_blm enroll_s4_f_blm teach_us_blm teach_absent_pct_blm electricity_now_blm ht_skillslab_definition_blm ht_pedagogy_active_blm"
local Xiteach "female_teacher_bli age_bli qualified_bli entre_lessonplan_shown_bli pedagogy_active_bli profit_calculation_bli profit_definition_bli otherjob_bli"
local Xmteach "female_teacher_blm age_blm qualified_blm entre_lessonplan_shown_blm pedagogy_active_blm profit_calculation_blm profit_definition_blm otherjob_blm"
local Xistudent "femalei assets_pct_bli moth_primary_ormore_bli repeat_S4_bli S3_exam_bli job_holiday_bli compound_interest_bli anysavings_bli profit_calculation_bli planned_schl_postsec_bli planned_business_bli grit_raw_bli"
local Xmstudent "femalem assets_pct_blm moth_primary_ormore_blm repeat_S4_blm S3_exam_blm job_holiday_blm compound_interest_blm anysavings_blm profit_calculation_blm planned_schl_postsec_blm planned_business_blm grit_raw_blm"


qui xi: reg treatment `Xiht' `Xiteach' `Xistudent' `Xmht' `Xmteach' `Xmstudent' i.strata
testparm `Xiht' `Xiteach' `Xistudent' `Xmht' `Xmteach' `Xmstudent'

* repeat omnibus test, omitting imbalanced student X's: female, employment, grit
local Xistudent "assets_pct_bli moth_primary_ormore_bli repeat_S4_bli S3_exam_bli compound_interest_bli anysavings_bli profit_calculation_bli planned_schl_postsec_bli planned_business_bli"
local Xmstudent "assets_pct_blm moth_primary_ormore_blm repeat_S4_blm S3_exam_blm compound_interest_blm anysavings_blm profit_calculation_blm planned_schl_postsec_blm planned_business_blm"

qui xi: reg treatment `Xiht' `Xiteach' `Xistudent' `Xmht' `Xmteach' `Xmstudent' i.strata
testparm `Xiht' `Xiteach' `Xistudent' `Xmht' `Xmteach' `Xmstudent'
 
foreach d in ht teacher student {
	erase "$temp/`d'temp.dta"
}
local end=`"$S_TIME"' 
di "`start'"
di "`end'"
log close
